//link application and inventor data
//to do this, merge application-assignee and inventor-gender data

use "${filedata}inventor_gender.dta", clear
merge m:1 application_number using "${filedata}2014application_emp.dta"
drop if _merge != 3
drop _merge

//ID US apps
gen US_indicator = (US_state==1)
bysort application_number: egen US_group= min(US_indicator)
gen missing = (female == .)
//ID applications for which we don't know the gender of all authors
bysort application_number: egen missing_gender = max(missing)
//drop these
drop if missing_gender==1

//drop duplicates (why are these here?)
duplicates tag, gen(dup)
drop dup
duplicates drop
drop missing missing_gender

bysort application_number: gen inventor_count= _N
gen solo_inventor= (inventor_count == 1)
bysort application_number: egen tot_female = sum(female)
bysort application_number: gen prop_female = tot_female/inventor_count
bysort application_number: gen female_dummy = (prop_female> 0)
bysort application_number: gen half_female = (prop_female>=.50)
bysort application_number: gen all_female= (prop_female == 1)

replace emp_assigned = 0 if emp_assigned==.

//drop vars we don't need that take up space
drop examiner_name_last examiner_name_first examiner_name_middle ee_name_raw invention_subject_matter confirm_number customer_number appl_status_code appl_status_date file_location file_location_date earliest_pgpub_number earliest_pgpub_date wipo_pub_number invention_title aia_first_to_file US_state abandon_date disposal_type

save "${filedata}2014apps_emp_inventors_gender.dta", replace
//this is application*inventor-level data
